{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n",
      "/usr/local/lib/python3.5/dist-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from utils import *\n",
    "import tensorflow as tf\n",
    "from sklearn.cross_validation import train_test_split\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['negative', 'positive']\n",
      "10662\n",
      "10662\n"
     ]
    }
   ],
   "source": [
    "trainset = sklearn.datasets.load_files(container_path = 'data', encoding = 'UTF-8')\n",
    "trainset.data, trainset.target = separate_dataset(trainset,1.0)\n",
    "print (trainset.target_names)\n",
    "print (len(trainset.data))\n",
    "print (len(trainset.target))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ONEHOT = np.zeros((len(trainset.data),len(trainset.target_names)))\n",
    "ONEHOT[np.arange(len(trainset.data)),trainset.target] = 1.0\n",
    "train_X, test_X, train_Y, test_Y, train_onehot, test_onehot = train_test_split(trainset.data, \n",
    "                                                                               trainset.target, \n",
    "                                                                               ONEHOT, test_size = 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "vocab from size: 20332\n",
      "Most common words [('film', 1453), ('movie', 1270), ('one', 727), ('like', 721), ('story', 477), ('much', 386)]\n",
      "Sample data [534, 2498, 3339, 15576, 36, 8832, 217, 150, 19, 4334] ['rock', 'destined', '21st', 'centurys', 'new', 'conan', 'hes', 'going', 'make', 'splash']\n"
     ]
    }
   ],
   "source": [
    "concat = ' '.join(trainset.data).split()\n",
    "vocabulary_size = len(list(set(concat)))\n",
    "data, count, dictionary, rev_dictionary = build_dataset(concat, vocabulary_size)\n",
    "print('vocab from size: %d'%(vocabulary_size))\n",
    "print('Most common words', count[4:10])\n",
    "print('Sample data', data[:10], [rev_dictionary[i] for i in data[:10]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "GO = dictionary['GO']\n",
    "PAD = dictionary['PAD']\n",
    "EOS = dictionary['EOS']\n",
    "UNK = dictionary['UNK']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def temporal_padding(x,padding=(1,1)):\n",
    "    return tf.pad(x, [[0, 0], [padding[0], padding[1]], [0, 0]])\n",
    "\n",
    "def attention_block(x):\n",
    "    k_size = x.get_shape()[-1].value\n",
    "    v_size = x.get_shape()[-1].value\n",
    "    key = tf.layers.dense(x, units=k_size, activation=None, use_bias=False, \n",
    "                          kernel_initializer=tf.random_normal_initializer(0, 0.01))\n",
    "    query = tf.layers.dense(x, units=v_size, activation=None, use_bias=False, \n",
    "                            kernel_initializer=tf.random_normal_initializer(0, 0.01))\n",
    "    logits = tf.matmul(key, key, transpose_b=True)\n",
    "    logits = logits / np.sqrt(k_size)\n",
    "    weights = tf.nn.softmax(logits, name=\"attention_weights\")\n",
    "    return tf.matmul(weights, query)\n",
    "\n",
    "def convolution1d(x, num_filters, dilation_rate, k,\n",
    "                  filter_size=3, stride=[1], pad='VALID'):\n",
    "    with tf.variable_scope('conv1d_%d'%(k)):\n",
    "        num_filters = num_filters * 2\n",
    "        V = tf.get_variable('V', [filter_size, int(x.get_shape()[-1]), num_filters],\n",
    "                            tf.float32, initializer=None,trainable=True)\n",
    "        g = tf.get_variable('g', shape=[num_filters], dtype=tf.float32,\n",
    "                            initializer=tf.constant_initializer(1.), trainable=True)\n",
    "        b = tf.get_variable('b', shape=[num_filters], dtype=tf.float32,\n",
    "                            initializer=None, trainable=True)\n",
    "        W = tf.reshape(g, [1, 1, num_filters]) * tf.nn.l2_normalize(V, [0, 1])\n",
    "        left_pad = dilation_rate * (filter_size  - 1)\n",
    "        x = temporal_padding(x, (left_pad, 0))\n",
    "        x = tf.nn.bias_add(tf.nn.convolution(x, W, pad, stride, [dilation_rate]), b)\n",
    "        split0, split1 = tf.split(x, num_or_size_splits=2, axis=2)\n",
    "        split1 = tf.sigmoid(split1)\n",
    "        return tf.multiply(split0, split1)\n",
    "\n",
    "def temporalblock(input_layer, out_channels, filter_size, stride, dilation_rate,\n",
    "                  dropout,k,highway=False):\n",
    "    keep_prob = 1.0 - dropout\n",
    "    in_channels = input_layer.get_shape()[-1]\n",
    "    count = 0\n",
    "    with tf.variable_scope('temporal_block_%d'%(k)):\n",
    "        conv1 = convolution1d(input_layer, out_channels, dilation_rate, count,\n",
    "                              filter_size, [stride])\n",
    "        noise_shape = (tf.shape(conv1)[0], tf.constant(1), tf.shape(conv1)[2])\n",
    "        dropout1 = tf.nn.dropout(conv1, keep_prob, noise_shape)\n",
    "        dropout1 = attention_block(dropout1)\n",
    "        count += 1\n",
    "        conv2 = convolution1d(input_layer, out_channels, dilation_rate, count,\n",
    "                              filter_size, [stride])\n",
    "        dropout2 = tf.nn.dropout(conv2, keep_prob, noise_shape)\n",
    "        dropout2 = attention_block(dropout2)\n",
    "        residual = None\n",
    "        if highway:\n",
    "            W_h = tf.get_variable('W_h', [1, int(input_layer.get_shape()[-1]), out_channels],\n",
    "                                  tf.float32, tf.random_normal_initializer(0, 0.01), trainable=True)\n",
    "            b_h = tf.get_variable('b_h', shape=[out_channels], dtype=tf.float32,\n",
    "                                  initializer=None, trainable=True)\n",
    "            H = tf.nn.bias_add(tf.nn.convolution(input_layer, W_h, 'SAME'), b_h)\n",
    "            W_t = tf.get_variable('W_t', [1, int(input_layer.get_shape()[-1]), out_channels],\n",
    "                                  tf.float32, tf.random_normal_initializer(0, 0.01), trainable=True)\n",
    "            b_t = tf.get_variable('b_t', shape=[out_channels], dtype=tf.float32,\n",
    "                                  initializer=None, trainable=True)\n",
    "            T = tf.nn.bias_add(tf.nn.convolution(input_layer, W_t, 'SAME'), b_t)\n",
    "            T = tf.nn.sigmoid(T)\n",
    "            residual = H*T + input_layer * (1.0 - T)\n",
    "        elif in_channels != out_channels:\n",
    "            W_h = tf.get_variable('W_h', [1, int(input_layer.get_shape()[-1]), out_channels],\n",
    "                                  tf.float32, tf.random_normal_initializer(0, 0.01), trainable=True)\n",
    "            b_h = tf.get_variable('b_h', shape=[out_channels], dtype=tf.float32,\n",
    "                                  initializer=None, trainable=True)\n",
    "            residual = tf.nn.bias_add(tf.nn.convolution(input_layer, W_h, 'SAME'), b_h)\n",
    "        else:\n",
    "            print(\"no residual convolution\")\n",
    "        res = input_layer if residual is None else residual\n",
    "        return tf.nn.relu(dropout2 + res)\n",
    "\n",
    "def temporal_convd(input_layer, num_channels, sequence_length, \n",
    "                   kernel_size=2, dropout=0):\n",
    "    for i in range(len(num_channels)):\n",
    "        dilation_size = 2 ** i\n",
    "        out_channels = num_channels[i]\n",
    "        input_layer = temporalblock(input_layer, out_channels, kernel_size, 1, dilation_size,dropout,i)\n",
    "        print(input_layer.shape)\n",
    "        \n",
    "    return input_layer\n",
    "\n",
    "class Model:\n",
    "    def __init__(self,embedded_size,dict_size, dimension_output, learning_rate,\n",
    "                levels=5,size_layer=256,kernel_size=7,maxlen=50):\n",
    "        self.X = tf.placeholder(tf.int32, [None, maxlen])\n",
    "        self.Y = tf.placeholder(tf.float32, [None, dimension_output])\n",
    "        encoder_embeddings = tf.Variable(tf.random_uniform([dict_size, embedded_size], -1, 1))\n",
    "        encoder_embedded = tf.nn.embedding_lookup(encoder_embeddings, self.X)\n",
    "        #channel_sizes = [int(size_layer * ((i+1) / levels)) for i in reversed(range(levels))]\n",
    "        channel_sizes = [size_layer] * levels\n",
    "        tcn = temporal_convd(input_layer=encoder_embedded, num_channels=channel_sizes, \n",
    "                              sequence_length=maxlen, kernel_size=kernel_size)\n",
    "        self.logits = tf.contrib.layers.fully_connected(tcn[:, -1, :], dimension_output, \n",
    "                                                        activation_fn=None)\n",
    "        self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.logits, labels = self.Y))\n",
    "        self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)\n",
    "        correct_pred = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.Y, 1))\n",
    "        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(?, 50, 256)\n",
      "no residual convolution\n",
      "(?, 50, 256)\n",
      "no residual convolution\n",
      "(?, 50, 256)\n",
      "no residual convolution\n",
      "(?, 50, 256)\n",
      "no residual convolution\n",
      "(?, 50, 256)\n",
      "WARNING:tensorflow:From <ipython-input-6-5762d6398256>:98: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "\n",
      "Future major versions of TensorFlow will allow gradients to flow\n",
      "into the labels input on backprop by default.\n",
      "\n",
      "See tf.nn.softmax_cross_entropy_with_logits_v2.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "embedded_size = 128\n",
    "dimension_output = len(trainset.target_names)\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "maxlen = 50\n",
    "\n",
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "model = Model(embedded_size,len(dictionary),dimension_output,learning_rate)\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 0, pass acc: 0.000000, current acc: 0.495605\n",
      "time taken: 14.44695234298706\n",
      "epoch: 0, training loss: 0.699916, training acc: 0.508641, valid loss: 0.693803, valid acc: 0.495605\n",
      "\n",
      "epoch: 1, pass acc: 0.495605, current acc: 0.528809\n",
      "time taken: 11.885334014892578\n",
      "epoch: 1, training loss: 0.682211, training acc: 0.558475, valid loss: 0.693424, valid acc: 0.528809\n",
      "\n",
      "epoch: 2, pass acc: 0.528809, current acc: 0.576660\n",
      "time taken: 11.899104595184326\n",
      "epoch: 2, training loss: 0.603994, training acc: 0.672822, valid loss: 0.721133, valid acc: 0.576660\n",
      "\n",
      "epoch: 3, pass acc: 0.576660, current acc: 0.610352\n",
      "time taken: 11.887564182281494\n",
      "epoch: 3, training loss: 0.437183, training acc: 0.791075, valid loss: 0.840032, valid acc: 0.610352\n",
      "\n",
      "epoch: 4, pass acc: 0.610352, current acc: 0.630371\n",
      "time taken: 11.898242950439453\n",
      "epoch: 4, training loss: 0.293793, training acc: 0.870857, valid loss: 1.098253, valid acc: 0.630371\n",
      "\n",
      "time taken: 11.890364646911621\n",
      "epoch: 5, training loss: 0.200901, training acc: 0.916075, valid loss: 1.347229, valid acc: 0.628418\n",
      "\n",
      "epoch: 6, pass acc: 0.630371, current acc: 0.639648\n",
      "time taken: 11.902666568756104\n",
      "epoch: 6, training loss: 0.121153, training acc: 0.948982, valid loss: 1.495926, valid acc: 0.639648\n",
      "\n",
      "epoch: 7, pass acc: 0.639648, current acc: 0.649414\n",
      "time taken: 11.89632511138916\n",
      "epoch: 7, training loss: 0.064524, training acc: 0.977273, valid loss: 2.003168, valid acc: 0.649414\n",
      "\n",
      "epoch: 8, pass acc: 0.649414, current acc: 0.653809\n",
      "time taken: 11.900001049041748\n",
      "epoch: 8, training loss: 0.050920, training acc: 0.981416, valid loss: 2.490625, valid acc: 0.653809\n",
      "\n",
      "epoch: 9, pass acc: 0.653809, current acc: 0.662598\n",
      "time taken: 11.898637294769287\n",
      "epoch: 9, training loss: 0.059499, training acc: 0.977746, valid loss: 2.009929, valid acc: 0.662598\n",
      "\n",
      "time taken: 11.892675161361694\n",
      "epoch: 10, training loss: 0.045996, training acc: 0.983902, valid loss: 2.127246, valid acc: 0.657227\n",
      "\n",
      "time taken: 11.906291007995605\n",
      "epoch: 11, training loss: 0.026977, training acc: 0.989465, valid loss: 2.161261, valid acc: 0.657715\n",
      "\n",
      "epoch: 12, pass acc: 0.662598, current acc: 0.668457\n",
      "time taken: 11.895596504211426\n",
      "epoch: 12, training loss: 0.022034, training acc: 0.993253, valid loss: 2.620792, valid acc: 0.668457\n",
      "\n",
      "time taken: 11.89814829826355\n",
      "epoch: 13, training loss: 0.019271, training acc: 0.993608, valid loss: 2.251740, valid acc: 0.656250\n",
      "\n",
      "time taken: 11.896304607391357\n",
      "epoch: 14, training loss: 0.012023, training acc: 0.996686, valid loss: 2.489076, valid acc: 0.664062\n",
      "\n",
      "time taken: 11.894188642501831\n",
      "epoch: 15, training loss: 0.009401, training acc: 0.996804, valid loss: 3.078192, valid acc: 0.657715\n",
      "\n",
      "time taken: 11.891865253448486\n",
      "epoch: 16, training loss: 0.010861, training acc: 0.997159, valid loss: 2.989694, valid acc: 0.668457\n",
      "\n",
      "time taken: 11.905904531478882\n",
      "epoch: 17, training loss: 0.007594, training acc: 0.997277, valid loss: 2.649658, valid acc: 0.664062\n",
      "\n",
      "break epoch:18\n",
      "\n"
     ]
    }
   ],
   "source": [
    "EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 5, 0, 0, 0\n",
    "while True:\n",
    "    lasttime = time.time()\n",
    "    if CURRENT_CHECKPOINT == EARLY_STOPPING:\n",
    "        print('break epoch:%d\\n'%(EPOCH))\n",
    "        break\n",
    "        \n",
    "    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0\n",
    "    for i in range(0, (len(train_X) // batch_size) * batch_size, batch_size):\n",
    "        batch_x = str_idx(train_X[i:i+batch_size],dictionary,maxlen)\n",
    "        acc, loss, _ = sess.run([model.accuracy, model.cost, model.optimizer], \n",
    "                           feed_dict = {model.X : batch_x, model.Y : train_onehot[i:i+batch_size]})\n",
    "        train_loss += loss\n",
    "        train_acc += acc\n",
    "    \n",
    "    for i in range(0, (len(test_X) // batch_size) * batch_size, batch_size):\n",
    "        batch_x = str_idx(test_X[i:i+batch_size],dictionary,maxlen)\n",
    "        acc, loss = sess.run([model.accuracy, model.cost], \n",
    "                           feed_dict = {model.X : batch_x, model.Y : test_onehot[i:i+batch_size]})\n",
    "        test_loss += loss\n",
    "        test_acc += acc\n",
    "    \n",
    "    train_loss /= (len(train_X) // batch_size)\n",
    "    train_acc /= (len(train_X) // batch_size)\n",
    "    test_loss /= (len(test_X) // batch_size)\n",
    "    test_acc /= (len(test_X) // batch_size)\n",
    "    \n",
    "    if test_acc > CURRENT_ACC:\n",
    "        print('epoch: %d, pass acc: %f, current acc: %f'%(EPOCH,CURRENT_ACC, test_acc))\n",
    "        CURRENT_ACC = test_acc\n",
    "        CURRENT_CHECKPOINT = 0\n",
    "    else:\n",
    "        CURRENT_CHECKPOINT += 1\n",
    "        \n",
    "    print('time taken:', time.time()-lasttime)\n",
    "    print('epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\\n'%(EPOCH,train_loss,\n",
    "                                                                                          train_acc,test_loss,\n",
    "                                                                                          test_acc))\n",
    "    EPOCH += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "   negative       0.67      0.66      0.67      1075\n",
      "   positive       0.66      0.67      0.66      1058\n",
      "\n",
      "avg / total       0.66      0.66      0.66      2133\n",
      "\n"
     ]
    }
   ],
   "source": [
    "logits = sess.run(model.logits, feed_dict={model.X:str_idx(test_X,dictionary,maxlen)})\n",
    "print(metrics.classification_report(test_Y, np.argmax(logits,1), target_names = trainset.target_names))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
